In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import geopandas as gpd
import matplotlib.pyplot as plt
import json
import plotly.graph_objects as go
from shapely.geometry import LineString
import plotly.io as pio
pio.renderers.default = 'notebook_connected'
from shapely import wkt
from geopy.distance import geodesic
In [2]:
#1) Load the GeoJSON file
SGmap = gpd.read_file('MasterPlan2019PlanningAreaBoundaryNoSea.geojson')
Average Monthly Household Income by Planning Area¶
In [3]:
#2) Create Map and Barplot of Average Monthly Household Income by Planning Area
SGincome = pd.read_csv("ResidentHouseholdsbyPlanningAreaofResidenceandMonthlyHouseholdIncomefromWorkCensusOfPopulation2020.csv")
MidIncomeRange = {'Below_1_000': 500,
'1_000_1_999': 1500,
'2_000_2_999': 2500,
'3_000_3_999': 3500,
'4_000_4_999': 4500,
'5_000_5_999': 5500,
'6_000_6_999': 6500,
'7_000_7_999': 7500,
'8_000_8_999': 8500,
'9_000_9_999': 9500,
'10_000_10_999': 10500,
'11_000_11_999': 11500,
'12_000_12_999': 12500,
'13_000_13_999': 13500,
'14_000_14_999': 14500,
'15_000_17_499': 16250,
'17_500_19_999': 18750,
'20_000andOver': 22000}
def total_estimated_income(row):
total_income = 0
for income_range, midpoint in MidIncomeRange.items():
if row[income_range] > 0:
total_income += row[income_range] * midpoint
return total_income
SGincome['TotalEstimatedIncome'] = SGincome.apply(total_estimated_income, axis=1)
SGincome['AvgIncome'] = (SGincome['TotalEstimatedIncome'] / SGincome['Total']).round(2)
# Make all planning area name uppercase to be merged on flat_df on PLN_AREA_N later
SGincome['Number'] = SGincome['Number'].str.upper()
# Load your GeoJSON file (or string)
with open('MasterPlan2019PlanningAreaBoundaryNoSea.geojson') as f:
data = json.load(f)
# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(data["features"])
# parse HTML in Description
from bs4 import BeautifulSoup
def extract_attributes(description_html):
soup = BeautifulSoup(description_html, "html.parser")
rows = soup.find_all("tr")[1:] # skip header row
attributes = {}
for row in rows:
cols = row.find_all(["th", "td"])
if len(cols) == 2:
key = cols[0].text.strip()
val = cols[1].text.strip()
attributes[key] = val
return attributes
# Apply the extraction
desc_df = gdf["Description"].apply(extract_attributes).apply(pd.Series)
# Merge with main DataFrame
flat_df = pd.concat([gdf.drop(columns=["Description"]), desc_df], axis=1)
# Merge the income data with the flat_df GeoDataFrame
SGincomebyPA = flat_df.merge(SGincome, left_on='PLN_AREA_N', right_on='Number', how='left')
# Filter out rows where AvgIncome is NaN, we will use this as a separate Map to overlay on the main map
dfmissing = SGincomebyPA[SGincomebyPA['AvgIncome'].isna()].copy()
dfmissing["IncomeInfo"] = "No data"
#print(dfmissing['PLN_AREA_N'].unique()) # Check which Planning Areas have no income data
fig = go.Figure()
figdata = px.choropleth_map(
SGincomebyPA,
geojson=SGincomebyPA.geometry,
locations= SGincomebyPA.index,
color = "AvgIncome",
color_continuous_scale=px.colors.sequential.Plasma,
hover_name="PLN_AREA_N",
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figNA = px.choropleth_map(
dfmissing,
geojson=dfmissing.geometry,
locations= dfmissing.index,
color_discrete_sequence=["grey"],
hover_name="PLN_AREA_N",
hover_data="IncomeInfo",
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
fig = go.Figure(data = [*figdata.data, *figNA.data])
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
map_zoom=10)
fig.write_html('fig.html')
fig.show()
In [4]:
SGincomebyPAsorted = SGincomebyPA.sort_values(by='AvgIncome', ascending=False)
SGincomebyPAsorted = SGincomebyPAsorted [~(SGincomebyPAsorted['AvgIncome'].isna())]
figbar = px.bar(SGincomebyPAsorted, x='PLN_AREA_N', y='AvgIncome', color='AvgIncome',
color_continuous_scale=px.colors.sequential.Plasma,
hover_name='PLN_AREA_N',
title='Average Monthly Household Income by Planning Area',
labels={'AvgIncome': 'Average Income'},
height=600)
figbar.show()
Population by Planning Area¶
In [5]:
# 3) Create Map and Barplot of Population Density by Planning Area
# Calculate Land Area in km2 of each planning area
gdf = gpd.read_file("MP14_PLNG_AREA_NO_SEA_PL.shp").to_crs(epsg=3414)
gdf["area_km2"] = gdf.geometry.area / 1e6
land_area = gdf[['PLN_AREA_N', 'area_km2']]
#print(land_area)
SGpop = pd.read_csv("ResidentPopulationbyPlanningAreaSubzoneofResidenceEthnicGroupandSexCensusofPopulation2020.csv")
SGpop['Total_Total'] = SGpop['Total_Total'].str.replace('-', '0').astype(int)
#print(SGpop[SGpop['Number']=="Central Water Catchment - Total"]) # check whether '-' is replaced with '0' correctly
SGPApop = SGpop[SGpop["Number"].str.contains("Total")].copy()
#SGPApop['PlanningArea'] = SGPApop['Number'].str.split('-').str[0].str.upper().str.strip()
#SGPApop['PlanningArea'] = SGPApop['Number'].str.strip(to_strip=' - Total').str.upper()
SGPApop['PlanningArea'] = SGPApop['Number'].str.replace(r'\s*-\s*Total', '', regex=True).str.upper().str.strip()
#print(SGPApop['PlanningArea'].unique()) # check Planning Area names
#print(len(SGPApop['PlanningArea'].unique())) # check Planning Area number
# Merge population data with the GeoDataFrame
SGpoparea = land_area.merge(SGPApop, left_on='PLN_AREA_N', right_on='PlanningArea', how='left')
SGpopbyPA = flat_df.merge(SGpoparea, left_on='PLN_AREA_N', right_on='PlanningArea', how='left')
SGpopbyPA.rename(columns={'Total_Total': 'Total Population'}, inplace=True)
SGpopbyPA.drop(columns=['PLN_AREA_N_x', 'PLN_AREA_C', 'CA_IND', 'REGION_N', 'REGION_C', 'INC_CRC', 'FMEL_UPD_D', 'Number', 'Total_Males', 'Total_Females', 'Chinese_Total', 'Chinese_Males', 'Chinese_Females', 'Malays_Total', 'Malays_Males', 'Malays_Females', 'Indians_Total', 'Indians_Males', 'Indians_Females', 'Others_Total', 'Others_Males', 'Others_Females'], inplace=True)
#print(SGpopbyPA)
#print(SGpopbyPA['PlanningArea'].unique()) # check Planning Area names
#print(len(SGpopbyPA['PlanningArea'].unique())) # check Planning Area names
# Plot Population by Planning Area
figpop = px.choropleth_map(
SGpopbyPA,
geojson=SGpopbyPA.geometry,
locations= SGpopbyPA.index,
color = 'Total Population',
color_continuous_scale=px.colors.sequential.YlOrRd,
hover_name='PlanningArea',
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figpop.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
figpop.show()
In [6]:
SGpopbyPAsorted = SGpopbyPA.sort_values(by='Total Population', ascending=False)
SGpopbyPAsorted = SGpopbyPAsorted[~(SGpopbyPAsorted['Total Population'].isna())]
figpopbar = px.bar(SGpopbyPAsorted, y='PlanningArea', x='Total Population', color='Total Population',
color_continuous_scale=px.colors.sequential.YlOrRd,
title='Total Resident Population by Planning Area',
hover_name='PlanningArea',
labels={'Total Population': 'Resident Population'},
height=1200)
figpopbar.show()
Population Density by Planning Area¶
In [7]:
#Compute and Map Population Density
SGpopbyPA['PopulationDensity'] = (SGpopbyPA['Total Population'] / SGpopbyPA['area_km2']).round(2)
figpopden = px.choropleth_map(
SGpopbyPA,
geojson=SGpopbyPA.geometry,
locations= SGpopbyPA.index,
color = 'PopulationDensity',
color_continuous_scale=px.colors.sequential.Viridis,
hover_name='PlanningArea',
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figpopden.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
figpopden.show()
In [8]:
SGpopdenbyPAsorted = SGpopbyPA.sort_values(by='PopulationDensity', ascending=False)
SGpopdenbyPAsorted = SGpopdenbyPAsorted[~(SGpopdenbyPAsorted['PopulationDensity'].isna())]
figpopdenbar = px.bar(SGpopdenbyPAsorted, y='PlanningArea', x='PopulationDensity', color='PopulationDensity',
color_continuous_scale=px.colors.sequential.Viridis,
title='Population Density by Planning Area',
hover_name='PlanningArea',
labels={'PopulationDensity': 'Population Density (people/km²)'},
height=1200)
figpopdenbar.show()
Subzones of Planning Areas¶
In [9]:
#1) Load the GeoJSON file
SGsubzone = gpd.read_file('SGsubzone.geojson')
#print(SGsubzone .head())
# Load your GeoJSON file (or string)
with open('SGsubzone.geojson') as f:
data = json.load(f)
# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(data["features"])
# parse HTML in Description
from bs4 import BeautifulSoup
def extract_attributes(description_html):
soup = BeautifulSoup(description_html, "html.parser")
rows = soup.find_all("tr")[1:] # skip header row
attributes = {}
for row in rows:
cols = row.find_all(["th", "td"])
if len(cols) == 2:
key = cols[0].text.strip()
val = cols[1].text.strip()
attributes[key] = val
return attributes
# Apply the extraction
desc_df = gdf["Description"].apply(extract_attributes).apply(pd.Series)
# Merge with main DataFrame
SGsubzone = pd.concat([gdf.drop(columns=["Description"]), desc_df], axis=1)
# 1) General Subzone Map
figSZ = px.choropleth_map(
SGsubzone,
geojson=SGsubzone.geometry,
locations= SGsubzone.index,
hover_name="SUBZONE_N",
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figSZ.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
figSZ.show()
Population by Subzone¶
In [10]:
# 2) Population by Subzone
SZpop = pd.read_csv('ResidentPopulationbyPlanningAreaSubzoneofResidenceEthnicGroupandSexCensusofPopulation2020.csv')
SZpop.drop(columns=['Total_Males', 'Total_Females',
'Chinese_Total', 'Chinese_Males', 'Chinese_Females', 'Malays_Total',
'Malays_Males', 'Malays_Females', 'Indians_Total', 'Indians_Males',
'Indians_Females', 'Others_Total', 'Others_Males', 'Others_Females'], inplace=True)
SZpop['Total_Total'] = SZpop['Total_Total'].str.replace('-', '0').astype(int)
SZpop = SZpop.rename(columns={'Number': 'Planning Area Subzone', 'Total_Total': 'Total Resident Population'})
SZpop['Planning Area Subzone'] = SZpop['Planning Area Subzone'].str.strip().str.upper()
#print(SZpop['Planning Area Subzone'])
#print(SGsubzone)
PlanningAreapop = SZpop[~SZpop['Planning Area Subzone'].isin(SGsubzone['SUBZONE_N'])]
Subzonespop = SZpop[SZpop['Planning Area Subzone'].isin(SGsubzone['SUBZONE_N'])]
SubzonespopGEO = SGsubzone.merge(Subzonespop, left_on='SUBZONE_N', right_on='Planning Area Subzone', how='left')
#print(SubzonespopGEO.head())
figSZpop = px.choropleth_map(
SubzonespopGEO,
geojson=SubzonespopGEO.geometry,
locations= SubzonespopGEO.index,
hover_name='Planning Area Subzone',
color='Total Resident Population',
color_continuous_scale=px.colors.sequential.YlOrRd,
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figSZpop.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
figSZpop.show()
In [11]:
# Compute Area of Subzone in km²
SubzonespopGEO['area_km2'] = SubzonespopGEO['geometry'].area * 10000
#print(SubzonespopGEO[['SUBZONE_N', 'area_km2']])
SubzonespopGEO['population_density'] = (SubzonespopGEO['Total Resident Population'] / SubzonespopGEO['area_km2']).round(2)
SubzonespopGEO[['SUBZONE_N', 'population_density']].sort_values(by='population_density', ascending=False)
figSZpop = px.choropleth_map(
SubzonespopGEO,
geojson=SubzonespopGEO.geometry,
locations= SubzonespopGEO.index,
hover_name='Planning Area Subzone',
color='population_density',
color_continuous_scale=px.colors.sequential.Viridis,
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figSZpop.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
figSZpop.show()
MRT Lines and Stations¶
In [12]:
## For combining mrt stations and mrt lines map
fig = go.Figure()
#1) Load the GeoJSON file
with open('MasterPlan2019RailStationlayerGEOJSON.geojson') as f:
data = json.load(f)
# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(data["features"])
# parse HTML in Description
from bs4 import BeautifulSoup
def extract_attributes(description_html):
soup = BeautifulSoup(description_html, "html.parser")
rows = soup.find_all("tr")[1:] # skip header row
attributes = {}
for row in rows:
cols = row.find_all(["th", "td"])
if len(cols) == 2:
key = cols[0].text.strip()
val = cols[1].text.strip()
attributes[key] = val
return attributes
# Apply the extraction
desc_df = gdf["Description"].apply(extract_attributes).apply(pd.Series)
# Merge with main DataFrame
SGmrtstations = pd.concat([gdf.drop(columns=["Description"]), desc_df], axis=1)
#print(SGmrtstations)
## Remove interchange from the name so we can merge with the code dataset
SGmrtstations["NAME"] = SGmrtstations["NAME"].str.replace(r"\binterchange\b", "", case=False, regex=True).str.strip()
#print(SGmrtstations[SGmrtstations["NAME"].str.contains("Interchange", case=False, na=False)])
## 88, JALAN BESAR -> BENDEMEER
SGmrtstations.loc[88, 'NAME'] = 'BENDEMEER'
## 76, "" -> DOWNTOWN
SGmrtstations.loc[76, 'NAME'] = 'DOWNTOWN'
## 83, RIVER VALLEY -> FORT CANNING
SGmrtstations.loc[83, 'NAME'] = 'FORT CANNING'
## 194, JELEPANG -> JELAPANG
SGmrtstations.loc[194, 'NAME'] = 'JELAPANG'
## 53, ONE NORTH -> ONE-NORTH
SGmrtstations.loc[53, 'NAME'] = 'ONE-NORTH'
## 102, SUM KEE -> SAM KEE
SGmrtstations.loc[102, 'NAME'] = 'SAM KEE'
## 188 CHOA CHU KANG -> TEN MILE JUNCTION
SGmrtstations.loc[188, 'NAME'] = 'TEN MILE JUNCTION'
## 202 "" -> ORCHARD BOULEVARD
SGmrtstations.loc[202, 'NAME'] = 'ORCHARD BOULEVARD'
## 232 "" -> ORCHARD BOULEVARD
SGmrtstations.loc[232, 'NAME'] = 'WOODLANDS NORTH'
## 96 "" -> MAXWELL
SGmrtstations.loc[96, 'NAME'] = 'MAXWELL'
## 106 "" -> RESORTS WORLD
SGmrtstations.loc[106, 'NAME'] = 'RESORTS WORLD'
## 73 "" -> IMBIAH
SGmrtstations.loc[73, 'NAME'] = 'IMBIAH'
## 74 "" -> BEACH
SGmrtstations.loc[74, 'NAME'] = 'BEACH'
figstation = px.choropleth_map(
SGmrtstations,
geojson=SGmrtstations.geometry,
locations= SGmrtstations.index,
color_discrete_sequence=["darkblue"],
hover_name="NAME",
hover_data=["RAIL_TYPE"],
title="Bus Stops in Singapore",
labels={"NAME": "Station Name", "RAIL_TYPE": "Rail Type"},
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=1,
height=800,
map_style = 'carto-positron')
## mrt lines
with open('routes.citymapper.json', 'r', encoding='utf-8') as f:
data = json.load(f)
lines = []
for route in data['routes']:
route_name = route['name']
color = route['color']
for pattern in route.get('patterns', []):
pattern_name = pattern['name']
path = pattern['path']
if path:
line = LineString([(lon, lat) for lat, lon in path]) # careful: (lon, lat)
lines.append({
"route": route_name,
"pattern": pattern_name,
"color": color,
"geometry": line
})
SGmaproute = gpd.GeoDataFrame(lines, crs="EPSG:4326") # WGS84 for lat/lon
# First, convert LineString geometry into separate lon/lat lists
SGmaproute["lon"] = SGmaproute.geometry.apply(lambda x: list(x.coords.xy[0]))
SGmaproute["lat"] = SGmaproute.geometry.apply(lambda x: list(x.coords.xy[1]))
exploded_rows = []
for _, row in SGmaproute.iterrows():
df = pd.DataFrame({
"route": row["route"],
"pattern": row["pattern"],
"lon": row["lon"],
"lat": row["lat"]
})
exploded_rows.append(df)
SGmaproute_exploded = pd.concat(exploded_rows, ignore_index=True)
# Add a line_group id so each line is drawn properly
SGmaproute_exploded["line_id"] = SGmaproute_exploded["route"] + " | " + SGmaproute_exploded["pattern"]
color_map = {
"CC": "#fa9e0d", # Circle Line
"NS": "#d42e12", # North South Line
"NE": "#9900aa", # North East Line
"DT": "#005ec4", # Downtown Line
"EW": "#009645", # East West Line
"TE": "#784008", # Thomson-East Coast Line
"BPLRT": "#748477",
"PGLRT (East)": "#748477",
"PGLRT (West)": "#748477",
"SKLRT (West)": "#748477",
"SKLRT (East)":"#748477"
}
figmrtroute = px.line_map(
SGmaproute_exploded,
lat="lat",
lon="lon",
color="route",
color_discrete_map=color_map,
line_group="line_id",
hover_name="pattern",
hover_data=["route"],
title="MRT Routes in Singapore",
labels={"route": "Route Name", "pattern": "Pattern Name"},
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
height=800,
map_style="carto-positron"
)
fig = go.Figure(data = [*figstation.data, *figmrtroute.data])
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
height=700,
map_zoom=10.5)
Employment Density (Working People/Area)¶
In [13]:
gdf = gpd.read_file("MP14_PLNG_AREA_NO_SEA_PL.shp").to_crs(epsg=3414)
gdf["area_km2"] = gdf.geometry.area / 10**6
land_area = gdf[['PLN_AREA_N', 'area_km2','geometry']]
land_area = land_area.to_crs(epsg=4326)
#1) Load the GeoJSON file
SGWorkbyPA = pd.read_csv('EmployedResidentsAged15YearsandOverbyPlanningAreaofWorkplaceandUsualModeofTransporttoWorkCensusofPopulation2020.csv')
# Convert columns to numeric
SGWorkbyPA[['MRT_LRTOnly','MRT_LRTandPublicBusOnly', 'OthercombinationsofMRT_LRTorPublicBus']] = SGWorkbyPA[['MRT_LRTOnly','MRT_LRTandPublicBusOnly', 'OthercombinationsofMRT_LRTorPublicBus']].replace("-", 0).astype(int)
df_Workingpop = SGWorkbyPA[['Number', 'Total']].copy()
df_Workingpop["Number"] = df_Workingpop["Number"].str.upper().str.strip()
df_Workingpopdense = land_area.merge(df_Workingpop, left_on='PLN_AREA_N', right_on="Number", how="left")
#print(df_Workingpopdense)
df_Workingpopdense["Working Population Density"] = (df_Workingpopdense["Total"] / df_Workingpopdense["area_km2"]).round(2)
dfmissing = df_Workingpopdense[df_Workingpopdense['Total'].isna()].copy()
dfmissing["Total"] = 0
dfmissing["Working Population Density"] = 0
dfmissing["Working Population Density Info"] = "No data"
fig1 = go.Figure()
figdata1 = px.choropleth_map(
df_Workingpopdense,
geojson=df_Workingpopdense.geometry,
locations= df_Workingpopdense.index,
color = "Working Population Density",
color_continuous_scale=px.colors.sequential.Viridis,
hover_name="PLN_AREA_N",
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figNA1 = px.choropleth_map(
dfmissing,
geojson=dfmissing.geometry,
locations= dfmissing.index,
color_discrete_sequence=["grey"],
hover_name="PLN_AREA_N",
hover_data="Working Population Density Info",
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figWPD = go.Figure(data = [*figdata1.data, *figNA1.data])
figWPD.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
map_zoom=10)
figWPD.show()
MRT User Ratio¶
In [14]:
df_mrt = SGWorkbyPA[['Number', 'Total', 'MRT_LRTOnly','MRT_LRTandPublicBusOnly', 'OthercombinationsofMRT_LRTorPublicBus']].copy()
# Sum up all mrt users
def total_mrt_users(row):
total_user = 0
total_user += row['MRT_LRTOnly']
total_user += row['MRT_LRTandPublicBusOnly']
total_user += row['OthercombinationsofMRT_LRTorPublicBus']
return total_user
df_mrt['Total MRT User'] = df_mrt.apply(total_mrt_users, axis=1)
df_mrt["Ratio_of_MRT_Users"] = (df_mrt['Total MRT User'] / df_mrt['Total']).round(3)
df_mrt["Number"] = df_mrt["Number"].str.upper().str.strip()
df_mrt_ratio = land_area.merge(df_mrt, left_on='PLN_AREA_N', right_on="Number", how="left")
#print(df_mrt_ratio)
dfmissingmrt = df_mrt_ratio[df_mrt_ratio['Total MRT User'].isna()].copy()
dfmissingmrt["Total MRT User"] = 0
dfmissingmrt["Ratio_of_MRT_Users"] = 0
dfmissingmrt["Ratio_of_MRT_Users Info"] = "No data"
#print(dfmissingmrt)
fig2 = go.Figure()
figdata2 = px.choropleth_map(
df_mrt_ratio,
geojson=df_mrt_ratio.geometry,
locations= df_mrt_ratio.index,
color = "Ratio_of_MRT_Users",
color_continuous_scale=px.colors.sequential.YlGnBu,
hover_name="PLN_AREA_N",
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figNA2 = px.choropleth_map(
dfmissingmrt,
geojson=dfmissingmrt.geometry,
locations= dfmissingmrt.index,
color_discrete_sequence=["grey"],
hover_name="PLN_AREA_N",
hover_data="Ratio_of_MRT_Users Info",
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figMRTr = go.Figure(data = [*figdata2.data, *figNA2.data], layout=figdata2.layout)
figMRTr.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
map_zoom=10)
figMRTr.show()
📍 Singapore MRT Morning Rush Hour Analysis¶
In [15]:
df = pd.read_csv("RidershipbyTrainStation.csv")
unique_count = df['PT_CODE'].nunique()
#print(unique_count)
mrt_stations = df['PT_CODE'].unique()
#print(mrt_stations)
#print(df[(df['PT_CODE']=="EW1") & (df['DAY_TYPE']=="WEEKDAY")])
#print(df[(df['PT_CODE']=="EW1") & (df['DAY_TYPE']=="WEEKENDS/HOLIDAY")])
morningrushhour=(df[(df['PT_CODE'] == "EW1") &
(df['DAY_TYPE'] == "WEEKDAY") &
(df['TIME_PER_HOUR'] >= 7) &
(df['TIME_PER_HOUR'] <= 9)])
eveningrushhour=(df[(df['PT_CODE'] == "EW1") &
(df['DAY_TYPE'] == "WEEKDAY") &
(df['TIME_PER_HOUR'] >= 17) &
(df['TIME_PER_HOUR'] <= 20)])
#print(eveningrushhour)
rushhour_data = []
for ST in mrt_stations:
morning_tapin = df[
(df['PT_CODE'] == ST) &
(df['DAY_TYPE'] == "WEEKDAY") &
(df['TIME_PER_HOUR'] >= 7) &
(df['TIME_PER_HOUR'] <= 9)]['TOTAL_TAP_IN_VOLUME'].sum()
morning_tapout = df[
(df['PT_CODE'] == ST) &
(df['DAY_TYPE'] == "WEEKDAY") &
(df['TIME_PER_HOUR'] >= 7) &
(df['TIME_PER_HOUR'] <= 9)]['TOTAL_TAP_OUT_VOLUME'].sum()
evening_tapin = df[
(df['PT_CODE'] == ST) &
(df['DAY_TYPE'] == "WEEKDAY") &
(df['TIME_PER_HOUR'] >= 17) &
(df['TIME_PER_HOUR'] <= 20)]['TOTAL_TAP_IN_VOLUME'].sum()
evening_tapout = df[
(df['PT_CODE'] == ST) &
(df['DAY_TYPE'] == "WEEKDAY") &
(df['TIME_PER_HOUR'] >= 17) &
(df['TIME_PER_HOUR'] <= 20)]['TOTAL_TAP_OUT_VOLUME'].sum()
rushhour_data.append({
'Train Station': ST,
'Average Morning Tap In': morning_tapin,
'Average Morning Tap Out': morning_tapout,
'Average Evening Tap In': evening_tapin,
'Average Evening Tap Out': evening_tapout
})
rushhour = pd.DataFrame(rushhour_data)
rushhour['Morning_OutIn_Ratio'] = rushhour['Average Morning Tap Out'] / rushhour['Average Morning Tap In']
rushhour['Evening_OutIn_Ratio'] = rushhour['Average Evening Tap Out'] / rushhour['Average Evening Tap In']
#print(rushhour[['Train Station', 'Morning_OutIn_Ratio']].sort_values("Morning_OutIn_Ratio", ascending=False))
#print(rushhour[['Train Station', 'Evening_OutIn_Ratio']].sort_values('Evening_OutIn_Ratio', ascending=False))
#print(rushhour)
## merge rushhour dataset (with station codes like "CC4/DT15") with your SGTrainStationInfo GeoDataFrame (which has individual codes like "CC4", "DT15").
# Step 1: Create a list of codes
rushhour['Train Station'] = rushhour['Train Station'].str.split('/')
# Step 2: Explode into multiple rows, one per code
rushhour_exploded = rushhour.explode('Train Station').reset_index(drop=True)
rushhour_exploded.rename(columns={'Train Station': 'SingleCode'}, inplace=True)
with open('MasterPlan2019RailStationlayerGEOJSON.geojson') as f:
data = json.load(f)
# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(data["features"])
# parse HTML in Description
from bs4 import BeautifulSoup
def extract_attributes(description_html):
soup = BeautifulSoup(description_html, "html.parser")
rows = soup.find_all("tr")[1:] # skip header row
attributes = {}
for row in rows:
cols = row.find_all(["th", "td"])
if len(cols) == 2:
key = cols[0].text.strip()
val = cols[1].text.strip()
attributes[key] = val
return attributes
# Apply the extraction
desc_df = gdf["Description"].apply(extract_attributes).apply(pd.Series)
# Merge with main DataFrame
SGmrtstations = pd.concat([gdf.drop(columns=["Description"]), desc_df], axis=1)
## Print out the stations with interchange in their name
#print(SGmrtstations[SGmrtstations["NAME"].str.contains("Interchange", case=False, na=False)])
## Remove interchange from the name so we can merge with the code dataset
SGmrtstations["NAME"] = SGmrtstations["NAME"].str.replace(r"\binterchange\b", "", case=False, regex=True).str.strip()
#print(SGmrtstations[SGmrtstations["NAME"].str.contains("Interchange", case=False, na=False)])
## 88, JALAN BESAR -> BENDEMEER
SGmrtstations.loc[88, 'NAME'] = 'BENDEMEER'
## 76, "" -> DOWNTOWN
SGmrtstations.loc[76, 'NAME'] = 'DOWNTOWN'
## 83, RIVER VALLEY -> FORT CANNING
SGmrtstations.loc[83, 'NAME'] = 'FORT CANNING'
## 194, JELEPANG -> JELAPANG
SGmrtstations.loc[194, 'NAME'] = 'JELAPANG'
## 53, ONE NORTH -> ONE-NORTH
SGmrtstations.loc[53, 'NAME'] = 'ONE-NORTH'
## 102, SUM KEE -> SAM KEE
SGmrtstations.loc[102, 'NAME'] = 'SAM KEE'
## 188 CHOA CHU KANG -> TEN MILE JUNCTION
SGmrtstations.loc[188, 'NAME'] = 'TEN MILE JUNCTION'
## 202 "" -> ORCHARD BOULEVARD
SGmrtstations.loc[202, 'NAME'] = 'ORCHARD BOULEVARD'
## 232 "" -> ORCHARD BOULEVARD
SGmrtstations.loc[232, 'NAME'] = 'WOODLANDS NORTH'
## 96 "" -> MAXWELL
SGmrtstations.loc[96, 'NAME'] = 'MAXWELL'
leftdf = sorted(list(SGmrtstations["NAME"].unique()))
#print(leftdf)
STcode_name = pd.read_excel("Train Station Codes and Chinese Names.xls")
STcode_name['mrt_station_english'] = STcode_name['mrt_station_english'].str.upper().str.strip()
rightdf = sorted(list(STcode_name['mrt_station_english'].unique()))
#print(rightdf)
right_not_in_left = [i for i in rightdf if i not in leftdf]
#print((right_not_in_left))
## To check which is still not in left df Eg: ['DOWNTOWN', 'FORT CANNING', 'JELAPANG', 'ONE-NORTH', 'SAM KEE', 'TEN MILE JUNCTION'] is still not in the df
#print(SGmrtstations["NAME"])
SGTrainStationInfo = SGmrtstations.merge(STcode_name, left_on="NAME", right_on="mrt_station_english", how="left")
#print(SGTrainStationInfo[SGTrainStationInfo["mrt_station_english"].isna()]['NAME'])
# remove all unopened stations (Cross Island Line, Jurong Regional Line etc)
SGTrainStationInfo = SGTrainStationInfo.dropna(subset=["mrt_station_english"])
SGTrainStationInfo.drop(columns=['GRND_LEVEL', 'INC_CRC', 'FMEL_UPD_D'], inplace=True)
#print(SGTrainStationInfo[SGTrainStationInfo['stn_code']=="DT15"])
#print(SGTrainStationInfo[SGTrainStationInfo['stn_code']=="CC4"])
############ Cleaned Full Train Station Dataset!! #########################3
merged = rushhour_exploded.merge(
SGTrainStationInfo,
left_on='SingleCode',
right_on='stn_code',
how='left'
)
#print(merged)
merged['Total_Morning_Taps'] = merged['Average Morning Tap In'] + merged['Average Morning Tap Out']
merged['Total_Day_Taps'] = (merged['Average Morning Tap In'] + merged['Average Morning Tap Out'] + merged['Average Evening Tap In'] + merged['Average Evening Tap Out'])
merged['Total_Evening_Taps'] = merged['Average Evening Tap In'] + merged['Average Evening Tap Out']
merged = gpd.GeoDataFrame(merged, geometry='geometry')
merged['lon'] = merged.geometry.centroid.x
merged['lat'] = merged.geometry.centroid.y
## visualize Morning Out/In Ratio as color, and tap-ins as marker size:
fig = px.scatter_map(
merged,
lat='lat',
lon='lon',
size='Total_Morning_Taps',
color='Morning_OutIn_Ratio',
hover_name='mrt_station_english',
hover_data={
'SingleCode': True,
'Average Morning Tap In': True,
'Average Morning Tap Out': True,
'Morning_OutIn_Ratio': ':.2f',
'lon': False,
'lat': False
},
color_continuous_scale=px.colors.sequential.YlOrRd,
size_max=30,
zoom=10.5,
height=700
)
fig.update_layout(
map_style='carto-darkmatter',
title='📍 Singapore MRT Morning Rush Hour Analysis',
margin={"r":0,"t":0,"l":0,"b":0}
)
fig.show()
Singapore MRT Morning Rush Hour Average Number of Tap Ins¶
In [16]:
fig = px.scatter_map(
merged,
lat='lat',
lon='lon',
size='Average Morning Tap In',
hover_name='mrt_station_english',
hover_data={
'SingleCode': True,
'Average Morning Tap In': True,
'Average Morning Tap Out': True,
'Morning_OutIn_Ratio': ':.2f',
'lon': False,
'lat': False
},
size_max=30,
zoom=10.5,
height=700
)
fig.update_layout(
map_style='carto-positron',
title='📍 Singapore MRT Morning Rush Hour Average Number of Tap Ins',
margin={"r":0,"t":0,"l":0,"b":0}
)
fig.show()
Singapore MRT Morning Rush Hour Average Number of Tap Outs¶
In [17]:
fig1 = px.scatter_map(
merged,
lat='lat',
lon='lon',
size='Average Morning Tap Out',
hover_name='mrt_station_english',
hover_data={
'SingleCode': True,
'Average Morning Tap In': True,
'Average Morning Tap Out': True,
'Morning_OutIn_Ratio': ':.2f',
'lon': False,
'lat': False
},
size_max=30,
zoom=10.5,
height=700
)
fig1.update_layout(
map_style='carto-positron',
title='📍 Singapore MRT Morning Rush Hour Average Number of Tap Outs',
margin={"r":0,"t":0,"l":0,"b":0}
)
fig1.show()
In [18]:
gdf = pd.read_csv('SGsubzones.csv')
gdf["geometry"] = gdf["geometry"].apply(wkt.loads)
## Convert WKT strings to actual Polygon geometries
SGsubzones = gpd.GeoDataFrame(gdf, geometry="geometry", crs="EPSG:3414")
# Centroids of subzones
SGsubzones["centroid"] = SGsubzones.geometry.centroid
SGsubzones["lat"] = SGsubzones.centroid.y
SGsubzones["lon"] = SGsubzones.centroid.x
#print(SGsubzones)
# Import MRT Station data
gdf1 = pd.read_csv("Complete MRT Data.csv")
gdf1["geometry"] = gdf1["geometry"].apply(wkt.loads)
SGmrtstationsoperational = gpd.GeoDataFrame(gdf1, geometry="geometry", crs="EPSG:3414")
SGmrtstations = SGmrtstations.merge(SGmrtstationsoperational[["NAME","stn_code"]], on="NAME", how="left")
# Centroids of mrt stations
SGmrtstations["centroid"] = SGmrtstations.geometry.centroid
SGmrtstations["lat"] = SGmrtstations.centroid.y
SGmrtstations["lon"] = SGmrtstations.centroid.x
# Coordinates of mrt
#mrt_coords = SGmrtstations[["lat", "lon"]].values
#print(mrt_coords)
mrt_coords = list(zip(SGmrtstations["NAME"],SGmrtstations["stn_code"], SGmrtstations["lat"], SGmrtstations["lon"]))
# nearest_mrt_distance() calculates the shortest distance (in km) between a given subzone's centroid and the list of MRT station coordinates.
## geodesic measures the real-world (spherical) distance between two GPS points.
def nearest_mrt_info(row, mrt_coords):
subzone_coord = (row["lat"], row["lon"])
# Calculate distances and keep name & code
distances = [(name,code, geodesic(subzone_coord, (lat, lon)).km) for name, code, lat, lon in mrt_coords]
# Find the MRT with the minimum distance
nearest_station, stn_code, min_distance = min(distances, key=lambda x: x[2])
return pd.Series([nearest_station, stn_code, min_distance])
SGsubzones[["nearest_mrt__station_name", "station_code", "dist_to_nearest_mrt_km"]] = SGsubzones.apply(
nearest_mrt_info, axis=1, args=(mrt_coords,)
)
SGsubzones["dist_to_nearest_mrt_km"] = SGsubzones["dist_to_nearest_mrt_km"].round(3)
SGdistomrt = SGsubzones.sort_values(by="dist_to_nearest_mrt_km", ascending=False)
#print(SGdistomrt)
Distance to Nearest MRT by Subzones¶
In [19]:
# 1. Merge SubzonespopGEO and SGdistomrt (both have subzone):
merged2 = SubzonespopGEO.merge(
SGdistomrt[['PLN_AREA_N', 'SUBZONE_N','dist_to_nearest_mrt_km', 'nearest_mrt__station_name', 'station_code']],
on=['PLN_AREA_N', 'SUBZONE_N'],
how='left'
)
# 2. Merge df_Workingpopdense (planning area level):
merged2 = merged2.merge(
df_Workingpopdense[['PLN_AREA_N', 'Working Population Density']],
on='PLN_AREA_N',
how='left'
)
#print(merged2.columns)
filtered = merged2[
(merged2['Total Resident Population'] >= 1000) |
(merged2['Working Population Density'] >= 1000)
]
## Remove Peripherial Islands
filtered = filtered[filtered['SUBZONE_N'] != 'SOUTHERN GROUP']
## Add Tengah
filtered= pd.concat([filtered, merged2[merged2['PLN_AREA_N'] == 'TENGAH']], ignore_index=True)
## Add Tuas
filtered= pd.concat([filtered, merged2[merged2['PLN_AREA_N'] == 'TUAS']], ignore_index=True)
## Remove Tuas View Extension
filtered = filtered[filtered['SUBZONE_N'] != 'TUAS VIEW EXTENSION']
## Add Marina Bay Subzones
filtered= pd.concat([filtered, merged2[merged2['PLN_AREA_N'] == 'STRAITS VIEW']], ignore_index=True)
filtered= pd.concat([filtered, merged2[merged2['PLN_AREA_N'] == 'MARINA SOUTH']], ignore_index=True)
filtered= pd.concat([filtered, merged2[merged2['PLN_AREA_N'] == 'MARINA EAST']], ignore_index=True)
## Add Paterson
filtered= pd.concat([filtered, merged2[merged2['SUBZONE_N'] == 'PATERSON']], ignore_index=True)
## Add Seletar & Aviation Park
filtered= pd.concat([filtered, merged2[merged2['SUBZONE_N'] == 'SELETAR']], ignore_index=True)
filtered= pd.concat([filtered, merged2[merged2['SUBZONE_N'] == 'SELETAR AEROSPACE PARK']], ignore_index=True)
# Create geodataframe of subzones that dont need to be shown on the map using Anti join.
df1 = merged2.merge(filtered, on='SUBZONE_N', how="left", indicator=True)
SZ_list = df1.loc[df1['_merge']=="left_only", 'SUBZONE_N']
dfnodatashown = merged2[merged2['SUBZONE_N'].isin(SZ_list)]
#print(dfnodatashown.columns)
SGdistomrttop30 = filtered.sort_values("dist_to_nearest_mrt_km", ascending=False).head(30)
#print(SGdistomrttop30[['SUBZONE_N', "dist_to_nearest_mrt_km", 'nearest_mrt__station_name', 'station_code']])
#print(filtered.columns)
In [20]:
figMRTdist = go.Figure()
figmrtroutedotted = px.line_map(
SGmaproute_exploded,
lat="lat",
lon="lon",
color="route",
color_discrete_map=color_map,
line_group="line_id",
hover_name="pattern",
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
height=600,
map_style="carto-positron",
)
figNA3 = px.choropleth_map(
dfnodatashown,
geojson=dfnodatashown.geometry,
locations= dfnodatashown.index,
hover_name="SUBZONE_N",
color_discrete_sequence=["grey"],
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figdisttomrt = px.choropleth_map(
filtered,
geojson=filtered.geometry,
locations= filtered.index,
hover_name="SUBZONE_N",
hover_data={
'dist_to_nearest_mrt_km': True,
'nearest_mrt__station_name': True,
'station_code': True},
color = 'dist_to_nearest_mrt_km',
color_continuous_scale=px.colors.sequential.YlOrRd,
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figMRTdist = go.Figure(data = [*figNA3.data, *figdisttomrt.data, *figstation.data, *figmrtroutedotted.data], layout=figdisttomrt.layout)
figMRTdist.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
map_zoom=10,
showlegend=False)
In Singapore urban planning terms, the practical distance to an MRT station is typically defined by walkability:
✅ General Guidelines for “Far” from MRT: Distance Practical Meaning ≤ 400m Very close, ~5-minute walk — ideal distance targeted in urban planning. 400m–800m Comfortable walking distance, ~5–10 minutes. Still considered accessible.
800m–1km Less convenient, ~12–15 minutes walk — often supplemented with feeder buses. 1 km Considered far by Singapore standards — generally relies on bus services, not walking.
MAP Of Underserved Subzones¶
In [21]:
df = filtered.copy()
df = df.merge(df_mrt_ratio[['PLN_AREA_N', 'Ratio_of_MRT_Users']], on='PLN_AREA_N', how='left')
In [22]:
# Define threshold values
# These thresholds are examples; change them based on your domain expertise
distance_threshold = 0.8 # in km, e.g., more than 0.8 km from an MRT station
# Get quantiles for MRT user ratio
low_mrt_ratio_threshold = df['Ratio_of_MRT_Users'].quantile(0.3)
high_mrt_ratio_threshold = df['Ratio_of_MRT_Users'].quantile(0.7)
# Use median of population density as a simple threshold for sufficiently dense areas
pop_density_threshold = df['population_density'].median()
# Define condition for "suppressed demand" (low MRT usage but far from MRT and densely populated)
condition_suppressed = (
(df['Ratio_of_MRT_Users'] < low_mrt_ratio_threshold) &
(df['dist_to_nearest_mrt_km'] > distance_threshold) &
(df['population_density'] > pop_density_threshold)
)
# Define condition for "overloaded demand" (high MRT usage despite being far from MRT and densely populated)
condition_overloaded = (
(df['Ratio_of_MRT_Users'] > high_mrt_ratio_threshold) &
(df['dist_to_nearest_mrt_km'] > distance_threshold) &
(df['population_density'] > pop_density_threshold)
)
# Combine both conditions for MRT Access Gap:
# A subzone is flagged as having an MRT access gap if either condition is met.
df['MRT_Access_Gap'] = np.where(condition_suppressed | condition_overloaded, True, False)
In [23]:
#print(df[['SUBZONE_N','MRT_Access_Gap']][df['MRT_Access_Gap']==True])
dfGAP = df[df['MRT_Access_Gap']==True]
dfFine = df[df['MRT_Access_Gap']==False]
In [24]:
figMRTdist = go.Figure()
figNA4 = px.choropleth_map(
dfFine,
geojson=dfFine.geometry,
locations= dfFine.index,
hover_name="SUBZONE_N",
color_discrete_sequence=["grey"],
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figunderserved = px.choropleth_map(
dfGAP,
geojson=dfGAP.geometry,
locations= dfGAP.index,
hover_name="SUBZONE_N",
hover_data={
'dist_to_nearest_mrt_km': True,
'nearest_mrt__station_name': True,
'station_code': True},
color_discrete_sequence=["red"],
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figMRTdist = go.Figure(data = [*figNA4.data, *figunderserved.data, *figstation.data, *figNA3.data], layout=figunderserved.layout)
figMRTdist.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
map_zoom=10,)
K Means Clustering¶
In [25]:
KMeansDF = filtered.merge(
df_mrt,
left_on='PLN_AREA_N',
right_on='Number',
how='left'
)
KMeansDF.drop(columns=['Number'], inplace=True)
#print(KMeansDF[KMeansDF['Ratio_of_MRT_Users'].isna()])
KMeansDF1 = KMeansDF[~KMeansDF['Ratio_of_MRT_Users'].isna()]
NAKmeans = KMeansDF[KMeansDF['Ratio_of_MRT_Users'].isna()]
In [26]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
# Step 1: Select and copy relevant columns
cols_to_use = ['dist_to_nearest_mrt_km', 'population_density', 'Working Population Density', 'Ratio_of_MRT_Users']
gdf_cluster = KMeansDF1.copy()
# Optional: Invert MRT usage if lower usage means more underserved
gdf_cluster['mrt_user_inverse'] = 1 - gdf_cluster['Ratio_of_MRT_Users']
# Step 2: Scaling
scaler = StandardScaler()
scaled_features = scaler.fit_transform(gdf_cluster[['dist_to_nearest_mrt_km', 'population_density', 'Working Population Density', 'mrt_user_inverse']])
# Step 3: Clustering (you can change n_clusters)
kmeans = KMeans(n_clusters=4, random_state=42)
gdf_cluster['cluster'] = kmeans.fit_predict(scaled_features)
gdf_cluster['cluster'] = gdf_cluster['cluster'].astype('str')
In [27]:
fig = px.scatter_3d(
gdf_cluster,
x='dist_to_nearest_mrt_km',
y='population_density',
z='Working Population Density',
color='cluster',
color_discrete_map={
"0": "red",
"1": "blue",
"2": "yellow",
"3": "green"
},
category_orders = {'cluster':["0","1","2","3"]},
opacity=0.5,
hover_name='SUBZONE_N',
title='Subzones Clustered by MRT Accessibility, Population Density and Working Population Density',
labels={
'PLN_AREA_N' : 'Planning Area',
'dist_to_nearest_mrt_km': 'Distance to Nearest MRT (km)',
'population_density': 'Population Density (per km²)',
'Working Population Density': 'Working Population Density (per km²)',
'cluster': 'Cluster'
},
size_max=2,
height=700
)
fig.update_layout(title='3D Clustering of Subzones', margin=dict(l=0, r=0, b=0, t=0))
fig.show()
In [28]:
fig = px.scatter_matrix(
gdf_cluster,
dimensions=['dist_to_nearest_mrt_km', 'population_density', 'Working Population Density', 'mrt_user_inverse'],
color='cluster',
color_discrete_map={
"0": "red",
"1": "blue",
"2": "yellow",
"3": "green"
},
category_orders = {'cluster':["0","1","2","3"]},
title='Clustered Subzones: Pairwise Feature Comparison',
labels={'cluster': 'Cluster'},
height=1000
)
fig.update_traces(diagonal_visible=False)
fig.show()
In [29]:
zone_type = {
"0" : "Regional & Mixed-Use Centres",
"1" : 'Mature HDB Heartlands',
"2" : "Central Business District (CBD)",
"3" : "Emerging / Underserved Zones"
}
gdf_cluster["Cluster Zone Type"] = gdf_cluster['cluster'].astype(str).map(zone_type)
print(gdf_cluster["Cluster Zone Type"])
0 Regional & Mixed-Use Centres
1 Regional & Mixed-Use Centres
2 Regional & Mixed-Use Centres
3 Central Business District (CBD)
4 Central Business District (CBD)
...
300 Emerging / Underserved Zones
301 Emerging / Underserved Zones
302 Emerging / Underserved Zones
307 Emerging / Underserved Zones
308 Emerging / Underserved Zones
Name: Cluster Zone Type, Length: 294, dtype: object
In [30]:
figK = go.Figure()
figNAK = px.choropleth_map(
NAKmeans,
geojson=NAKmeans.geometry,
locations= NAKmeans.index,
hover_name="SUBZONE_N",
color_discrete_sequence=["grey"],
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
figKluster = px.choropleth_map(
gdf_cluster,
geojson=gdf_cluster.geometry,
locations=gdf_cluster.index,
hover_name="SUBZONE_N",
hover_data={"Cluster Zone Type"},
color = 'cluster',
color_discrete_map={
"0": "red",
"1": "blue",
"2": "yellow",
"3": "green"
},
category_orders = {'cluster':["0","1","2","3"]},
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.4,
height=600,
map_style = 'carto-positron')
figK = go.Figure(data = [*figNAK.data, *figKluster.data, *figNA3.data], layout=figKluster.layout)
figK.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
map_zoom=10,)
In [31]:
gdf_cluster.groupby('cluster')[['dist_to_nearest_mrt_km', 'population_density', 'Working Population Density', 'Ratio_of_MRT_Users']].mean()
Out[31]:
| dist_to_nearest_mrt_km | population_density | Working Population Density | Ratio_of_MRT_Users | |
|---|---|---|---|---|
| cluster | ||||
| 0 | 0.460729 | 6935.100353 | 9117.760471 | 0.520259 |
| 1 | 0.467292 | 33420.074271 | 3575.596250 | 0.362344 |
| 2 | 0.201250 | 1175.575625 | 59607.884375 | 0.681437 |
| 3 | 1.129031 | 4491.511959 | 2391.653608 | 0.325351 |
In [32]:
dfGAPcluster = dfGAP.merge(gdf_cluster[["SUBZONE_N",'mrt_user_inverse', "cluster", "Cluster Zone Type"]], on="SUBZONE_N", how="left")
print(dfGAPcluster[["SUBZONE_N", "cluster", "Cluster Zone Type"]])
print(dfGAPcluster.columns)
SUBZONE_N cluster Cluster Zone Type
0 DOVER 0 Regional & Mixed-Use Centres
1 FRANKEL 3 Emerging / Underserved Zones
2 BALESTIER 0 Regional & Mixed-Use Centres
3 FLORA DRIVE 3 Emerging / Underserved Zones
4 TAI SENG 3 Emerging / Underserved Zones
5 PASIR RIS WEST 3 Emerging / Underserved Zones
6 TRAFALGAR 1 Mature HDB Heartlands
7 YISHUN EAST 1 Mature HDB Heartlands
8 SEMBAWANG NORTH 3 Emerging / Underserved Zones
9 NORTHLAND 3 Emerging / Underserved Zones
Index(['geometry', 'Name', 'SUBZONE_NO', 'SUBZONE_N', 'SUBZONE_C', 'CA_IND',
'PLN_AREA_N', 'PLN_AREA_C', 'REGION_N', 'REGION_C', 'INC_CRC',
'FMEL_UPD_D', 'Planning Area Subzone', 'Total Resident Population',
'area_km2', 'population_density', 'dist_to_nearest_mrt_km',
'nearest_mrt__station_name', 'station_code',
'Working Population Density', 'Ratio_of_MRT_Users', 'MRT_Access_Gap',
'mrt_user_inverse', 'cluster', 'Cluster Zone Type'],
dtype='object')
In [33]:
figGAPKluster = px.choropleth_map(
dfGAPcluster,
geojson=dfGAPcluster.geometry,
locations=dfGAPcluster.index,
hover_name="SUBZONE_N",
hover_data={"Cluster Zone Type", 'PLN_AREA_N'},
color = 'cluster',
color_discrete_map={
"0": "red",
"1": "blue",
"2": "yellow",
"3": "green"
},
category_orders = {'cluster':["0","1","2","3"]},
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.8,
height=700,
map_style = 'carto-positron')
figGAPKluster.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
map_zoom=10,)
In [34]:
df2 = gdf_cluster.merge(dfGAPcluster, on="SUBZONE_N", how="left", indicator=True)
szlist = df2.loc[df2["_merge"] == "left_only", 'SUBZONE_N']
dfNonTarget = gdf_cluster[gdf_cluster['SUBZONE_N'].isin(szlist)]
## Compute centroid coordinate of targeted subzones
SGsubzone["centroid"] = SGsubzone.geometry.centroid
SGsubzone["lat"] = SGsubzone.centroid.y
SGsubzone["lon"] = SGsubzone.centroid.x
print(SGsubzone[["SUBZONE_N","centroid"]])
SUBZONE_N centroid 0 DEPOT ROAD POINT (103.80856 1.28222) 1 BUKIT MERAH POINT (103.81859 1.28201) 2 CHINATOWN POINT (103.84369 1.27997) 3 PHILLIP POINT (103.84865 1.28528) 4 RAFFLES PLACE POINT (103.85101 1.28372) .. ... ... 327 UPPER THOMSON POINT (103.8323 1.35765) 328 SHANGRI-LA POINT (103.83839 1.36797) 329 TOWNSVILLE POINT (103.84847 1.36535) 330 MARYMOUNT POINT (103.84489 1.35436) 331 TUAS VIEW EXTENSION POINT (103.62761 1.25238) [332 rows x 2 columns]
In [35]:
figgreymrtroute2 = px.line_map(
SGmaproute_exploded,
lat="lat",
lon="lon",
color="route",
color_discrete_sequence=["black"],
line_group="line_id",
hover_name="pattern",
hover_data=["route"],
title="MRT Routes in Singapore",
labels={"route": "Route Name", "pattern": "Pattern Name"},
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
height=800,
map_style="carto-positron"
)
figNONTARGET = px.choropleth_map(
dfNonTarget,
geojson=dfNonTarget.geometry,
locations= dfNonTarget.index,
hover_name="SUBZONE_N",
color_discrete_sequence=["grey"],
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
opacity=0.6,
height=600,
map_style = 'carto-positron')
In [36]:
figNEWline = go.Figure(data = [*figGAPKluster.data, *figNONTARGET.data, *figNAK.data, *figNA3.data, *figstation.data, *figmrtroute.data], layout = figGAPKluster.layout)
figNEWline.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
map_zoom=10,)
In [37]:
SZonENLline = ["FRANKEL", "PLAB", "TRAFALGAR", "SELETAR HILLS", "SENGKANG WEST", "SELETAR AEROSPACE PARK", "YISHUN EAST"]
ENLine = SGsubzone[SGsubzone["SUBZONE_N"].isin(SZonENLline)]
#print(ENLine.columns)
#Drop Name column
ENLine = ENLine.drop("Name", axis=1)
#Rename SUBZONE_N to NAME
ENLine = ENLine.rename(columns={'SUBZONE_N': 'NAME'})
ExistingMRTstations = ["MARINE TERRACE", "KEMBANGAN", "KAKI BUKIT", "DEFU", "HOUGANG", "YISHUN"]
ENLineInterchange = SGmrtstations[SGmrtstations["NAME"].isin(ExistingMRTstations)]
#Drop 3 out of 4 Hougangs
ENLineInterchange = ENLineInterchange.drop([17, 441, 442])
In [38]:
#print(ENLine.columns)
#print(ENLineInterchange.columns)
ENLineMRTstations = pd.concat([ENLine,ENLineInterchange], join='inner')
ENLineMRTstations = ENLineMRTstations.drop(['INC_CRC', 'FMEL_UPD_D'], axis=1)
#print(ENLineMRTstations.columns)
#print(ENLineMRTstations["NAME"])
# Sort the stations
station_order = ['MARINE TERRACE', 'FRANKEL', 'KEMBANGAN', 'KAKI BUKIT', 'PLAB', 'DEFU', 'HOUGANG', 'TRAFALGAR', 'SELETAR HILLS', 'SENGKANG WEST', 'SELETAR AEROSPACE PARK', 'YISHUN EAST', 'YISHUN']
ENLineMRTstations['sequence'] = ENLineMRTstations['NAME'].apply(
lambda x: station_order.index(x))
ENLineMRTstations = ENLineMRTstations.sort_values(by="sequence")
print(ENLineMRTstations["NAME"])
181 MARINE TERRACE 117 FRANKEL 48 KEMBANGAN 392 KAKI BUKIT 324 PLAB 443 DEFU 16 HOUGANG 242 TRAFALGAR 248 SELETAR HILLS 237 SENGKANG WEST 263 SELETAR AEROSPACE PARK 305 YISHUN EAST 89 YISHUN Name: NAME, dtype: object
In [39]:
figENL = px.scatter_map(
ENLineMRTstations,
#text = "NAME",
lat = 'lat',
lon = 'lon',
hover_name = 'NAME',
color_discrete_sequence=["#00BFFF"],
)
figENLroute = px.line_map(
ENLineMRTstations,
lat="lat",
lon="lon",
color_discrete_sequence=["#00BFFF"],
hover_name="NAME",
title="East Coast – North-East Link",
center={"lat": 1.3521, "lon": 103.8198},
zoom=10,
height=800,
map_style="carto-positron"
)
'''
figENL.update_traces(textposition='middle right',
textfont=dict(
size=12,
color='black',
))
'''
fig = go.Figure(data = [*figstation.data, *figmrtroute.data, *figENL.data, *figENLroute.data])
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
map_zoom=10)
In [40]:
fig = go.Figure(go.Scattermap(
mode = "markers+text+lines",
lat=ENLineMRTstations["lat"],
lon=ENLineMRTstations["lon"],
marker = {'size': 12, 'symbol': ["rail-metro"]*13},
text = ["EN1: Marine Terrace",
"EN2: Frankel",
"EN3: Kembangan",
"EN4: Kaki Bukit",
"EN5: Paya Lebar Air Base",
"EN6: Defu",
"EN7: Hougang",
"EN8: Trafalgar",
"EN9: Seletar Hills",
"EN10: Sengkang West",
"EN11: Seletar Aerospace Park",
"EN12: Yishun East",
"EN13: Yishun"],
textposition = "middle right",
textfont = dict(size=12, color="black", weight=900),
))
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},
map_center={"lat": 1.3521, "lon": 103.8198},
height = 800,
map_zoom=10)
In [41]:
["EN1: Marine Terrace",
"EN2: Frankel",
"EN3: Kembangan",
"EN4: Kaki Bukit",
"EN5: Paya Lebar Air Base",
"EN6: Defu",
"EN7: Hougang",
"EN8: Trafalgar",
"EN9: Seletar Hills",
"EN10: Sengkang West",
"EN11: Seletar Aerospace Park",
"EN12: Yishun East",
"EN13: Yishun"]
Out[41]:
['EN1: Marine Terrace', 'EN2: Frankel', 'EN3: Kembangan', 'EN4: Kaki Bukit', 'EN5: Paya Lebar Air Base', 'EN6: Defu', 'EN7: Hougang', 'EN8: Trafalgar', 'EN9: Seletar Hills', 'EN10: Sengkang West', 'EN11: Seletar Aerospace Park', 'EN12: Yishun East', 'EN13: Yishun']
In [42]:
#!jupyter nbconvert --to html --execute --embed-images Singapore.ipynb